In [4]:
import holoviews as hv
hv.extension('bokeh')

%opts Graph [width=600 height=400]

Manufacturing Maintenance Case Study

Thurston Sexton + Mike Brundage

Measuring Machine Performance

Failure Inter-arrival Times, by Machine

In [120]:
import warnings
warnings.simplefilter(action='ignore')

# samp = ['broken', 'motor' ]
h_or_i = df.MACH.str.match(r'^[HI][0-9]*$').fillna(False)
is_broke = (tag_df.P['broken']>0)
# df.MACH[h_or_i]
cond = h_or_i & is_broke

idx_col = pd.DatetimeIndex(df['DATE RECEIVED'])
# cond = (tag_df.P['broken']>0)#|(tag_df.S['replace']>0)
sample_tag = tag_df.loc[cond,tag_df.loc[cond].sum()>1]

sample_tag.columns = sample_tag.columns.droplevel(0)

sample_tag = pd.concat([sample_tag, df.MACH[cond]], axis=1)
sample_tag['date'] = idx_col[cond]
sample_tag.loc[:,'mach_type'] = sample_tag.MACH.str[0]#.astype('category')
sample_tag['tbf'] = sample_tag.sort_values(['MACH','date']).groupby('MACH')['date'].diff()

sample_tag.loc[:,'tbf'] = sample_tag.tbf/pd.Timedelta(days=1)#.dt.total_seconds() / (24 * 60 * 60)
# (sample_tag.groupby('MACH')['tbf'].sum()/sample_tag.groupby('MACH')['tbf'].count()).sort_values()
sample_tag.mach_type.value_counts()
# sns.boxplot(y='tbf', x='MACH', data=sample_tag.dropna().reset_index())

# sample_tag.dropna(subset=['tbf']).plot(y='tbf', x='MACH', kind='box')
plt.figure(figsize=(5,10))
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
# sns.violinplot(data=sample_tag[['mach_type', 'tbf', 'MACH']].dropna(), 
#                y='MACH', x='tbf', hue='mach_type', cut=0, orient='h', scale='count', bw=.4)
samps = sample_tag[['mach_type', 'tbf', 'MACH']].dropna()
# order = samps.groupby('MACH').mean().sort_values('tbf').index
order = samps.MACH.value_counts().index


import matplotlib.gridspec as gridspec

fig = plt.figure(tight_layout=True, figsize=(12,8))
gs = gridspec.GridSpec(2, 2)

# with sns.axes_style('whitegrid') as style,\
# sns.plotting_context('poster') as context:
ax1 = fig.add_subplot(gs[:,0])
sns.boxplot(data=samps, y='MACH', x='tbf', 
            hue='mach_type', orient='h', 
            order=order[:20], notch=False,
           ax = ax1)
plt.xlabel('days');
# plt.xlim(0,4000)
plt.title('Time Between Failure ("broken")')
ax1.set(xlim=(0,250));
    
from lifelines import WeibullFitter, ExponentialFitter, KaplanMeierFitter

def mask_to_ETraw(df_clean, mask, fill_null=1.):
    filter_df = df_clean.loc[mask]
    g = filter_df.sort_values('DATE RECEIVED').groupby('MACH')
    T = g['DATE RECEIVED'].transform(pd.Series.diff)/pd.Timedelta(days=1)

    # assume censored when parts replaced (changeout)
    E = (~(tag_df.S['replaced']>0)).astype(int)[mask]
    T_defined = (T>0.)&T.notna()
    return T[T_defined], E[T_defined]

ax3 = fig.add_subplot(gs[-1,-1])
ax2 = fig.add_subplot(gs[0,-1], sharex=ax3)

T, E = mask_to_ETraw(df, cond)
kmf = KaplanMeierFitter()
kmf.fit(T, event_observed=E, label='Machine K-M')
kmf.plot(show_censors=True, censor_styles={'marker':'|'}, ax=ax2, color='xkcd:gray')
ax2.set(xlim=(0,250), ylabel=r'$S(t)$', title='Kaplan-Meier Survival Function');


i_ = df.MACH.str.match(r'^[I][0-9]*$').fillna(False)
T, E = mask_to_ETraw(df, i_&is_broke)
kmf.fit(T, event_observed=E, label='I-type K-M')
kmf.plot(show_censors=True, censor_styles={'marker':'|'}, ax=ax3)

h_ = df.MACH.str.match(r'^[H][0-9]*$').fillna(False)
T, E = mask_to_ETraw(df, h_&is_broke)
kmf.fit(T, event_observed=E, label='H-type K-M')
kmf.plot(show_censors=True, censor_styles={'marker':'|'}, ax=ax3)


ax3.set(xlim=(0,250), ylabel=r'$S(t)$', xlabel='days');
<Figure size 360x720 with 0 Axes>

Markers ( | ) indicate a censored observation, interpreted as a maintenance event with no replacements (no 'replaced' tag occurrence).

Top Tag occurences, by Machine

In [124]:
# top3 = sample_tag.MACH.isin(['H34', 'I19', 'H14'])
# sample_tag[top3, sample_tag.loc[top3,:].sum()>3].groupby('MACH').sum().plot(kind='bar')
# tag_df[sample_tag.MACH=='H34'].sum()

def machine_tags(name, n_reps):
    isguy = df['MACH'].str.contains(name, case =False).fillna(False)
    return tag_df.loc[isguy,(tag_df.loc[isguy,:].sum()>n_reps).values]
with sns.axes_style('whitegrid') as style,\
sns.plotting_context('talk') as context:
    f, ax = plt.subplots(ncols=3, figsize=(15, 5))

    for n, mach in enumerate(['H34', 'I19', 'H14']):
        mach_df = machine_tags(mach,  6).sum().sort_values()
        mach_df.plot(kind='barh', color=[colors[i] for i in mach_df.index.get_level_values(0)], ax=ax[n])
        ax[n].set_title(mach)
    plt.tight_layout()
  • H34 issues with motor, brush_unit
  • I19 alarms and/or sensors, potentially coolant-related
  • H14 wide array of issues, including operator (!?)
In [132]:
%%output size=150 backend='bokeh' filename='machs'
%%opts Text (text_align='right')
%%opts Graph (edge_line_width=4 node_line_color='white', node_size=1)
%%opts EdgePaths [color_index='weight'] (line_width=1, cmap='viridis', color='dodgerblue', alpha=.2)
%%opts Overlay [width=300 legend_position='top_right'] Layout [tabs=True]
%%opts Nodes (size='size' line_color='white')
# padding = dict(x=(-0.05, 1.05), y=(-0.05, 1.05))
hv.Text

kws = {'layout':nx.drawing.spring_layout,
#        'layout_kws':{'prog':'neato'} 
       'padding':dict(x=(-0.05, 1.05), y=(-0.05, 1.05))
      }

layout = hv.Layout([hv_net(machine_tags("H34",  5), name='H34',**kws),
                    hv_net(machine_tags("I19",  5), name='I19',**kws),
                    hv_net(machine_tags("H14",  5), name='H14',**kws)
                   ])
layout
Out[132]:

Measuring Technician Performance

In [87]:
islyle = df['Tech Full Name'].str.contains('Lyle Cookson').fillna(False)
df['Description'][islyle].value_counts()
Out[87]:
Base cleaning requested                                              11
Base needs to be cleaned                                              8
Clean base                                                            4
Base cleaning                                                         3
Base clean                                                            3
Base required cleaning                                                2
Cooling unit faults                                                   2
Base cleaning req                                                     2
Clean base -coolant sticky                                            1
Parts receiver prox cable shorting sensor                             1
Clean out Sinico                                                      1
Shipping cart has worn wheels                                         1
Chips in base obstructin coolant flow to pump                         1
Base full                                                             1
Base cleaning Requested                                               1
Coolant tank needs to be cleaned                                      1
Base needs to be cleaned -Opers overfilling and spilling on floor     1
Base cleaning -caused fire                                            1
Clean base to install SS chip catcher                                 1
Base has hydraulic fluid -Drain/Clean                                 1
Drain and clean tank -Do not refill                                   1
Base cleaning requested -Oil lines clogging                           1
Repair paper filter system                                            1
Coolant base needs to be cleaned                                      1
Name: Description, dtype: int64
In [121]:
df['Description'][df['Tech Full Name'].str.contains('Lyle Cookson').fillna(False)]

def person_tags(name, n_reps):
    isguy = df['Tech Full Name'].str.contains(name).fillna(False)
    return tag_df.loc[isguy,(tag_df.loc[isguy,:].sum()>n_reps).values]

lyle_tags, steve_tags, andrew_tags = (person_tags('Lyle Cookson', 5),
                                      person_tags('Steve Andreozzi', 20),
                                      person_tags('Anthony Paolillo', 10))

# Glyle, *_ = tag_df_network(lyle_tags)
# Gsteve, *_ = tag_df_network(steve_tags)
# Gandrew, *_ = tag_df_network(andrew_tags)
mach_df = person_tags('Lyle Cookson',  5).sum().sort_values()
# mach_df = mach_df[mach_df>=5]
# with sns.axes_style('whitegrid') as style,\
# sns.plotting_context('poster') as context:
plt.figure(figsize=(5,5))
mach_df.plot(kind='barh', color=[colors[i] for i in mach_df.index.get_level_values(0)])
plt.title('Lyle')
Out[121]:
Text(0.5,1,'Lyle')

Threshold to tags happening >=5x

  • we can quickly gauge the number of Lyle's total "base cleanings" as 45-50

Say we want to compare with other, more "typical" technicians... $\rightarrow$ small problem...

In [91]:
# with sns.axes_style('whitegrid') as style,\
# sns.plotting_context('talk') as context:
f, ax = plt.subplots(ncols=3, figsize=(15, 5))
thres = [5, 20, 10]
for n, mach in enumerate(['Lyle Cookson', 'Steve Andreozzi', 'Anthony Paolillo']):
    mach_df = person_tags(mach,  5).sum().sort_values()
#         mach_df = mach_df[mach_df>=5]
    mach_df.plot(kind='barh', color=[colors[i] for i in mach_df.index.get_level_values(0)], ax=ax[n])
    ax[n].set_title(mach.split(' ')[0])
plt.tight_layout()
In [133]:
%%output size=150 backend='bokeh' filename='techs'
%%opts Graph (edge_line_width=4 node_line_color='white', node_size=1)
%%opts EdgePaths [color_index='weight'] (line_width=1, cmap='viridis', color='dodgerblue', alpha=.2)
%%opts Overlay [width=300 legend_position='top_right'] Layout [tabs=True]
%%opts Nodes (size='size' line_color='white')
padding = dict(x=(-0.05, 1.05), y=(-0.05, 1.05))


kws = {'layout':nx.drawing.spring_layout, 
#        'layout_kws':{'prog':'neatopusher'} 
       'padding':dict(x=(-0.05, 1.05), y=(-0.05, 1.05))
      }
layout = hv.Layout([hv_net(person_tags('Lyle Cookson', 1), name='Lyle',**kws),
                    hv_net(person_tags('Steve Andreozzi', 20), name='Steve',**kws),
                    hv_net(person_tags('Anthony Paolillo', 10), name='Anthony',**kws),
                    hv_net(person_tags("Norm Neveux", 1), name='Norm',**kws),
                    hv_net(person_tags("Doug Patrick", 1), name='Doug',**kws),
                    hv_net(person_tags("Colin Doherty", 5), name='Colin',**kws)
                   ])#.cols(1)
# print(layout)
layout
# graph.edgepaths*graph.nodes
Out[133]:

Finding & Tracking Hot-spots

HVAC Case study

In [151]:
from mlp import plot
idx_col = pd.DatetimeIndex(df.REPORTDATE)
samp = ['air_conditioning_unit', 'too_hot', 'too_cold']
cond = (tag_df.P.alarm==1)
sample_tag = tag_df.loc[:,(slice(None), samp)]
sample_tag.columns = sample_tag.columns.droplevel(0)

sample_tag = sample_tag.set_index(idx_col[:])

sample_tag = sample_tag[ sample_tag.index.year.isin([2009, 2010, 2016])]
plot.calendarplot(sample_tag,
                  how='sum', fig_kws={'figsize':(13,4)});
plt.suptitle('Tag Occurence')
Out[151]:
Text(0.5,0.98,'Tag Occurence')
In [155]:
# .rolling(1000).sum()
samp = ['too_cold', 'too_hot']
sample_tag = tag_df.loc[:,(slice(None), samp)]
sample_tag.columns = sample_tag.columns.droplevel(0)

sample_tag = sample_tag.set_index(idx_col).sort_index()

# with sns.axes_style('darkgrid') as style,\
# sns.plotting_context('poster') as context:
plt.figure(figsize=(15,3))
sample_tag.resample('30D').sum()
plt.plot(sample_tag.too_hot.resample('30D').sum(), 
         marker='.', ls=':', color='xkcd:orangered', label='too_hot')
plt.plot(sample_tag.too_cold.resample('30D').sum(), 
         marker='.', ls=':', color='xkcd:turquoise blue', label='too_cold')
roll = sample_tag.rolling('30D').sum()
mean = roll.resample('30D').mean()
err = roll.resample('30D').std()
plt.fill_between(mean.index, mean.too_hot-3*err.too_hot, mean.too_hot+3*err.too_hot,
                alpha=.4, color='xkcd:orangered')
plt.fill_between(mean.index, mean.too_cold-3*err.too_cold, mean.too_cold+3*err.too_cold,
                alpha=.4, color='xkcd:turquoise blue')
sns.despine()
plt.legend()
plt.title('Monthly tag-occurence estimate')
Out[155]:
Text(0.5,1,'Monthly tag-occurence estimate')
In [166]:
%%output size=150 filename='nist_hvac_map'
%%opts Polygons [height=350 width=300, tools=['hover'] colorbar=False ] (cmap='RdBu')

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
# , bounds = (-77.222, 39.13, -77.215, 39.14)
# bldg_dict[times[11]].cols(1)
# hv.HoloMap(bldg_dict)

(hv.HoloMap(bldg_dict, 'Time')*text + hv.HoloMap(vlines, 'Time')*temp_curves.opts(temp_curve_spec)).cols(1)
Out[166]:

Questions/Comments?